/***

**Language:** Stata

**Description:** Merges all datasets and creates the main annual sample. This is the first part of the annual sample creation process used to generate some intermediate datasets.

- **Inputs:**
  - __compustat_cleaned.dta (Step 1)
  - COMP_A_dttd_violations.dta (Step 3)

- **Outputs:**
  - __compustat_cleaned_allVars2.dta (Intermediate dataset, used for calculating indirect effects that are merger back in Step 14)
  
***/

global dir your_path
cd "$dir"

use "__compustat_cleaned.dta", clear 
count

** Merge Covenant Violations 
merge 1:1 gvkey fyear using "COMP_A_dttd_violations", keep(1 3) 
drop _merge

	sort gvkey fyear 
	xtset gvkey fyear 
	
	gen capx_rnd_at=(capx+xrd)/l.at
	gen capx_sppe_at=(capx-sppe)/l.at
	gen capx_at=(capx)/l.at
	gen rnd_sales=xrd/l.sale
	gen l_rnd_sales = l.rnd_sales
	gen purchases=(cogs+invch)
	gen buyer_days=360*(ap/purchases)
	gen supplier_days=360*(rectr/sale)
	gen net_tdc_days = buyer_days - supplier_days
	gen sic3=int(sic/10)
	gen sic2=int(sic/100)
	gen sic1=int(sic/1000)

* Winsorize and create new variables that are consistent with the quarterly data: 
	winsor2 capx_at l_logSale l_cf l_ROA l_mtb l_tangibility l_cash l_leverage l_zscore, replace 

	gen ln_sales=logSale 
	gen l_ln_sales=l_logSale
	gen l_tan=l_tangibility

	gen l_viol_confirmed=l.viol_confirmed
	gen viol_confirmed_new=viol_confirmed
	replace viol_confirmed_new=0 if l.viol_confirmed==1 
	gen l_viol_confirmed_new=l.viol_confirmed_new

	gen l_roa=l_ROA 
	gen l_mb=l_mtb
	gen l_bl=l_leverage
	gen bl=leverage
	gen mb=Q 
	gen roa=ROA

* 	Create the leads and lags for slackness. 
	foreach x in  slack_capex slack_int_cov slack_cur_ratio slack_ebitda_debt slack_debt_tangnetworth slack_fx_ch_cov slack_quick_ratio slack_debt_equity slack_ebitda slack_networth slack_tangnetworth slack_net_worth {
		winsor2 `x', replace 
		gen `x'2=`x' 
		replace `x'2=999 if `x'==99 & dttd!=.
		replace `x'2=. if dttd==.
		gen l_`x'2=l.`x'2
	}

	foreach x in dttd dttd2 {
		gen l_`x'=l.`x'
	}
	
	foreach x in SA WW LTD_DUE2 delaycon debtdelaycon  {
		winsor2 `x', replace 
		gen l_`x'=l.`x'
	}
	gen delaycon3=delaycon
	replace delaycon3=0 if delaycon==. 
	gen l_delaycon3=l.delaycon3
	gen delaycon_dummy=(delaycon==.)


*** Combine financial constraint variables together:
	foreach x in SA WW LTD_DUE2 delaycon debtdelaycon {
		su `x'
		gen `x'_std=(`x'-`r(mean)')/`r(sd)'
	}

	gen fin_constraint4= SA_std + WW_std + LTD_DUE2_std + debtdelaycon_std
	gen l_fin_constraint4=l.fin_constraint4
	gen has_capex_cov=(slack_capex2!=. & slack_capex2!=999 & slack_capex2!=99) if slack_capex2!=.
	gen l_has_capex_cov=l.has_capex_cov

*** Create Covenant Ratios (winsor its components first)
	winsor2 at dltt seq act lct oibdp xint prcc_c prcc_f csho lt txditc, replace

	gen rdd_cf_assets=oibdp/((at+l.at)/2)
	gen rdd_lev_ratio=(dlc+dltt)/((at+l.at)/2)
	gen rdd_int_exp_assets=xint/((at+l.at)/2)
	gen rdd_networth_assets=seq/at
	gen rdd_cur_ratio=act/lct
	gen rdd_mkt_to_book = ( prcc_f*csho - (at-lt+txditc) ) / at

	winsor2 rdd_cf_assets rdd_lev_ratio rdd_int_exp_assets rdd_networth_assets rdd_cur_ratio rdd_mkt_to_book, replace 

*** Generate 2nd and 3rd degree polynomials: 
	foreach x in rdd_cf_assets rdd_lev_ratio rdd_int_exp_assets rdd_networth_assets rdd_cur_ratio rdd_mkt_to_book {
		gen l_`x'=l.`x'
		replace `x'=l_`x' 
		drop l_`x'
		gen `x'2=`x'^2
		gen `x'3=`x'^3
	}

***/
  
** There are not enough observations to study in 2018 for violations, so exclude them completely;
replace l_viol_confirmed=. if fyear>=2018

winsor2 buyer_days supplier_days net_tdc_days, replace cuts(1 99)
winsor2 siv sppe at capx, replace

gen l_slcapex= l_has_capex_cov
gen fin_con4=fin_constraint4
gen buyerdays = buyer_days
gen supplierdays = supplier_days


***** ----------- FOR ASSET SUBSTITUTION ----------- *****
* 
*** Create dummies by size:  
astile l_sale_group=l_logSale, nq(2) by(year) 
gen asset_spec = l_rnd_sales
gen asset_spec2 = l_rnd_sales if l_rnd_sales!=0 & l_rnd_sales!=.

astile asset_spec_group=asset_spec if asset_spec!=., nq(2) by(year)
astile asset_spec2_group=asset_spec2 if asset_spec2!=., nq(2) by(year)

gen asset_spec_large=(asset_spec_group==2) if asset_spec_group!=.
gen asset_spec2_large=(asset_spec2_group==2) if asset_spec2_group!=.

* Interaction terms: 
foreach x in WW SA LTD_DUE2 debtdelaycon fin_constraint4 viol_confirmed has_capex_cov dttd2 {
	gen i_asset_spec_`x'=l_`x'*asset_spec_large
	gen i_asset_spec2_`x'=l_`x'*asset_spec2_large
} 


save "__compustat_cleaned_allVars2", replace 
export delimited using "compustat_cleaned_allVars2.csv" , replace 
// NOTE: This is used as an input for the construction of indirect effects, and later merged to the final dataset. 

/*** --------------------------------------- ***
// PART 2:  
- TEMP export the above to CSV and RUN Column sum analysis to generate the "SC_column_sums dataset"
- You also need to have constructed the network by now. (see Readme file: SC Network Construction)
*** --------------------------------------- ***/


*** GO Calculate PARNTERS' DATA ***
use "SCnetwork2", clear // SCnetwork2 is constructed in file: 1_SC_Network_part1.d0 and 1_SC_Network_part2.sas (for the projected network)
gen gvkey=gvkey2 

global keep_vars capx_at WW SA LTD_DUE2 delaycon debtdelaycon fin_constraint4 viol_confirmed dttd2 l_viol_confirmed has_capex_cov ///
	  			     l_slack_capex2 l_dttd2 l_logSale l_cash l_zscore l_roa l_mtb l_bl buyer_days supplier_days

*** Merge Ptn Compustat data
	merge m:1 gvkey fyear using "__compustat_cleaned_allVars2" , keepusing($keep_vars)
	drop if _merge == 2
	drop _merge
*** Merge TNIC data
	merge m:1 gvkey fyear using "TNIC3HHIdata_1989_2019"
	drop if _merge == 2
	drop _merge 

*** Merge column sums
	merge m:1 gvkey fyear using "SC_column_sums", gen(_merge_columnsums)
	drop if _merge_columnsums==2	

	foreach x in $keep_vars tnic3tsimm tnic3hhi tnic3tsimm_group3 tnic3hhi_group3 columnsum {
		rename `x' P_`x'
	}
	drop gvkey
	gen gvkey=gvkey1 
	sort gvkey1 fyear gvkey2

	collapse (mean) P_*   				 ///
			 (count) num_partners=gvkey2 ///
			 (min) P_min_ts = P_tnic3tsimm  P_min_hhi = P_tnic3hhi P_min_ts_g3 = P_tnic3tsimm_group3  /// 
			 [aweight=score], by(gvkey fyear)

save "P_Compustat_cleaned_allVars2", replace 


*** --------------------------------------- ***
// PART 3: 
*** --------------------------------------- ***
*** BACK TO THE FOCAL FIRMS ***

*** Start back from the focal firms
use "__compustat_cleaned_allVars2", clear

*** Merge PARTNERS' data
	merge 1:1 gvkey fyear using "P_Compustat_cleaned_allVars2"
	drop if _merge==2
	drop _merge
*** Merge TNIC data
	merge m:1 gvkey fyear using "TNIC3HHIdata_1989_2019"
	drop if _merge == 2
	drop _merge 
*** Merge centralities
	merge 1:1 gvkey fyear using "centralities_vtnic_1989_2019", gen(_merge_centralities)
	drop if _merge_centralities==2 
*** Merge column sums
	merge m:1 gvkey fyear using "SC_column_sums", gen(_merge_columnsums)
	drop if _merge_columnsums==2	
*** Merge Compustat Names
	gen gvkey_supplier=gvkey
	merge m:1 gvkey_supplier using "compustat_names", keepusing(conm) gen(_merge_names)
	drop if _merge_names==2
*** Merge Citations: 
	merge 1:1 gvkey year using "patents_clean_public_2023-07-11.dta", gen(_merge_citations)
	drop if _merge_citations==2
*** Merge the relationships:
	merge 1:1 gvkey fyear using "SC_rel_link_firm_year", gen(_merge_SC_relationships)
	drop if _merge_SC_relationships==2


	sort gvkey fyear
	xtset gvkey fyear
	gen FC=.
	gen Ptn_FC=.

	winsor2 sale columnsum asset_spec P_tnic3tsimm tnic3tsimm num_patents 
	astile asset_spec_group_med = asset_spec, nq(2) by(sic3 fyear)
	gen asset_spec_large_med = (asset_spec_group_med==2) if asset_spec_group_med!=.

	astile P_TS_group = P_tnic3tsimm if P_tnic3tsimm!=., nq(2) by(sic3 fyear)
	gen P_TS_H = (P_TS_group==2)

	astile TS_H2 = tnic3tsimm, nq(2) by(sic3 fyear)
	replace TS_H2=TS_H2-1

	astile  HHI_H2 = tnic3hhi, nq(2) by(sic3 fyear)
	replace HHI_H2 = HHI_H2-1

	astile patents_H = num_patents, nq(2) by(sic3 fyear)
	replace patents_H = patents_H - 1

	gen patents_per_sales = num_patents_w / sale_w
	winsor2 patents_per_sales
	astile patents_per_sales_H = patents_per_sales, nq(2) by(sic3 fyear)
	replace patents_per_sales_H = patents_per_sales_H - 1


	gen product_diff = -tnic3tsimm
	gen product_diff_H2 = -TS_H2


*** SAVE TEMP DATASET: 
save "__compustat_cleaned_allVars3", replace


*** --------------------------------------- ***
// PART 4: 
*** --------------------------------------- ***

*** Creation of the number of (potential) partners
*****
use "SCnetwork2", clear
    keep if source<3
    gcollapse (nunique) comp_fact_partners = gvkey2, by(gvkey1 fyear supplier)
    reshape wide comp_fact_partners, i(gvkey1 fyear) j(supplier)
    forvalues x=0(1)1 {
        replace comp_fact_partners`x' = 0 if comp_fact_partners`x'==.
    }
    rename (comp_fact_partners0 comp_fact_partners1) (comp_fact_partners_Up comp_fact_partners_Dn)

    gen comp_fact_partners = comp_fact_partners_Up + comp_fact_partners_Dn
    rename gvkey1 gvkey
    duplicates drop
save "SCnetwork2_partners", replace
*****/


*** ----- COMBINE ALL TOGETHER FOR THE FINAL SAMPLE: 
use "__compustat_cleaned_allVars3", clear

   merge 1:1 gvkey fyear using "SC_rel_length_all_directed", gen(_merge_SC) keep(1 3)
   merge 1:1 gvkey fyear using "VT_depth", keep(1 3) gen(_merge_VT_depth)
   merge 1:1 gvkey fyear using "SC_TNIC3HHI_P_by_supplier", keep(1 3) gen(_merge_SC_TNIC_S_C) keepusing(P_tnic3tsimm_S P_tnic3tsimm_C P_tnic3hhi_S P_tnic3hhi_C P_tnic3tsimm_S_g2 P_tnic3tsimm_C_g2 P_tnic3hhi_S_g2 P_tnic3hhi_C_g2)
	merge 1:1 gvkey fyear using "SC_column_sums_No_VTNIC", keep(1 3) gen(_merge_SC_columnsums_noVTNIC)
    *** Merge the block info: 
   merge 1:1 gvkey fyear using "sc_blocks_collapsed", keep(1 3) gen(_merge_sc_blocks_collapsed)
   merge 1:1 gvkey fyear using "SC_alliances_collapsed", keep(1 3) gen(_merge_SC_alliances_collapsed)
	merge 1:1 gvkey fyear using "SCnetwork2_partners", keep(1 3) gen(_merge_SCnetwork2_partners)
   replace num_partners = comp_fact_partners


   winsor2 VT_num_firms VT_scores
   gen ln_VT_scores_w = ln(VT_scores)
   
   gen actual_potential_partners  = num_partners/(num_partners+VT_num_firms_w)
   winsor2 actual_potential_partners
   astile actual_potential_partners_g2  = actual_potential_partners, nq(2)
   gen ln_actual_partners = log(num_partners)
   gen ln_potential_partners = log(VT_num_firms_w)
   gen potential_partners = VT_num_firms_w 
   gen actual_partners = num_partners
   winsor2 potential_partners actual_partners 
   astile P_tnic3hhi_g2 = P_tnic3hhi, nq(2)

   astile columnsum_med = columnsum_w, nq(2) by(fyear sic3)
   replace columnsum_med = columnsum_med - 1
   astile P_columnsum_med = P_columnsum, nq(2) by(fyear sic3)
   replace P_columnsum_med = P_columnsum_med - 1
   gen columnsum_G  = (columnsum>=P_columnsum) if !missing(columnsum_w, P_columnsum)
   gen columnsum_PG = 1-columnsum_G

    winsor2 P_columnsum 
    winsor2 buyer_days supplier_days net_tdc_days, suffix(_w3) cuts(3 97)
    gen ln_buyer_days = ln(buyer_days)
    gen ln_supplier_days = ln(supplier_days)
    gen ln_net_tdc_days = ln(net_tdc_days)

	foreach y in rel_length_run rel_length_run_C rel_length_run_S columnsum_w P_columnsum_w {
		forvalues x = 2(1)2 {
			astile `y'_g`x' = `y', nq(`x') by(fyear)
			gen `y'_high`x' = (`y'_g`x'==`x') if `y'!=.
		}
	}
    gen rel_length_run_ceil = ceil(rel_length_run)
    gen ln_rel_length = ln(rel_length_run)
    egen rel_length_run_max = max(rel_length_run), by(gvkey)

	forvalues x = 2/2 {
		astile columnsum_group`x' = columnsum, nq(`x') by(fyear)
		gen columnsum_high`x' = (columnsum_group`x'==`x')
	}

	foreach x in common_block_max position_1_to_2_max  position_2_to_1_max {
		gen `x'100 = `x'*100
	}
	gen common_alliance_max100 = common_alliance_max*100
    *** Replace these with zero if column_sums are missing, cause that's the filter you also have in the regression: 
    replace common_alliance_max100 =.  if missing(columnsum_w, P_columnsum_w)
    replace position_1_to_2_max100 =.  if missing(columnsum_w, P_columnsum_w)


	*** SUMMARY STATISTICS *** 
	replace l_sale=l_sale/1000
	replace l_at=l_at/1000

*** Add Contextual Effects: (Created in file: TNIC3_Contextual_effects)
	preserve
			import delimited using "tnic3_data.txt", clear
			rename year fyear 
			rename gvkey2 gvkey 
			merge m:1 gvkey fyear using "compustat_cleaned_allVars3", keep(1 3) gen(_merge_tnic3_contextual) ///
				keepusing(l_WW l_SA l_LTD_DUE2 l_debtdelaycon l_fin_constraint4 l_viol_confirmed l_has_capex_cov l_dttd2 l_logSale  l_cash  l_zscore  l_roa  l_mb  l_bl)
			rename gvkey gvkey2
			sort gvkey1 fyear gvkey2
			drop if gvkey1==gvkey2
			*** Collapse and finalize the dataset:
			collapse (mean) l_WW l_SA l_LTD_DUE2 l_debtdelaycon l_fin_constraint4 l_viol_confirmed l_has_capex_cov l_dttd2 l_logSale  l_cash  l_zscore  l_roa  l_mb  l_bl (sum) sum_score=score [aweight=score] , by(gvkey1 fyear)
			foreach x in l_WW l_SA l_LTD_DUE2 l_debtdelaycon l_fin_constraint4 l_viol_confirmed l_has_capex_cov l_dttd2 l_logSale  l_cash  l_zscore  l_roa  l_mb  l_bl {
				rename `x' `x'_
			}
			rename gvkey1 gvkey 
			save "TNIC3_contextual.dta", replace
	restore 

global keep_vars l_WW_ l_SA_ l_LTD_DUE2_ l_debtdelaycon_ l_fin_constraint4_ l_viol_confirmed_ l_has_capex_cov_ l_dttd2_ l_logSale_ l_cash_ l_zscore_ l_roa_ l_mb_ l_bl_
merge 1:1 gvkey fyear using "TNIC3_contextual", keepusing($keep_vars) keep(1 3) gen(_merge_tnic3_contextual)


*** -------------------------- ***
save "__compustat_cleaned_allVars4", replace
count

*** ---------------------------------------------------------- ***
*** Export for USE in Matlab ;
 export delimited using "__compustat_cleaned.csv", replace 

